clear
set more off

* Set working folder to main folder

import delimited using input\CUSP\patents_fyear_iyear_from_1900.csv, clear

keep if fyear >= 1916 & fyear <= 1920

keep patnum fyear fmonth 

drop if fyear == . | fmonth == . 

save "input\migration_of_inventors\temp.dta", replace

import delimited using input\CUSP\help_files\unique_ids\patents_inventor_name_location_id_90.csv, clear

gen aaa1 = substr(id,1,1)
encode aaa1, gen(bbb1)
gen aaa2 = substr(id,2,.)
destring aaa2, gen(bbb2)
gen inventor_id = 1000000*bbb1 + bbb2
drop aaa* bbb*

keep patnum inv_name inv_city inv_state inventor_id

duplicates drop patnum inv_name, force

replace inv_name = lower(inv_name)
replace inv_city = lower(inv_city)
replace inv_state = lower(inv_state)

bys patnum: gen fraction_of_patent = 1/_N

merge m:1 patnum using "input\migration_of_inventors\temp.dta", keep(match) nogen

gen fyear_fmonth = 100*fyear + fmonth

save "input\migration_of_inventors\temp1.dta", replace

******************************************************
*** STAYERS:
*** Generate dataset of patents filed only by stayers:

*** We now have a patent-level dataset with inventor-id, and indication of city and state

*** Now, we want to:

** 1) Match inv_city with one of the 50 NPI cities, and keep only the ones with an NPI city:

* First, the 43 cities:
import delimited using input\CUSP\inv_patents_assigned_to_npi_cities.csv, clear

replace inv_name = lower(inv_name)
replace inv_city = lower(inv_city)
replace inv_state = lower(inv_state)

keep patnum inv_name npi_id

duplicates drop patnum inv_name, force

merge 1:1 patnum inv_name using "input\migration_of_inventors\temp1.dta", keep(match) nogen
save "input\migration_of_inventors\temp3.dta", replace

* Then, the 7 extra cities:
import delimited using input\CUSP\inv_patents_assigned_to_cls_cities.csv, clear

replace inv_name = lower(inv_name)
replace inv_city = lower(inv_city)
replace inv_state = lower(inv_state)

keep patnum inv_name cls_id

duplicates drop patnum inv_name, force

merge 1:1 patnum inv_name using "input\migration_of_inventors\temp1.dta", keep(match) nogen

append using "input\migration_of_inventors\temp3.dta"
* NOTE: Here there are duplicates (patents with npi_id also have a cls_id). But this will be fixed in the next step
save "input\migration_of_inventors\temp3.dta", replace

* Now, put everything together:

use "output\cities_expandedpatents.dta", clear
keep if gen_id!=. & npi_id!=.
keep gen_id npi_id city
duplicates drop
save "input\migration_of_inventors\temp4a.dta", replace

use "output\cities_expandedpatents.dta", clear
keep if gen_id!=. & cls_id!=.
keep gen_id cls_id city
duplicates drop
save "input\migration_of_inventors\temp4b.dta", replace

use "input\migration_of_inventors\temp3.dta", clear
merge m:1 npi_id using "input\migration_of_inventors\temp4a.dta", keep(match) nogen
save "input\migration_of_inventors\temp5.dta", replace

use "input\migration_of_inventors\temp3.dta", clear
merge m:1 cls_id using "input\migration_of_inventors\temp4b.dta", keep(match) nogen
append using "input\migration_of_inventors\temp5.dta"
save "input\migration_of_inventors\temp5.dta", replace

** 2) Identify "stayers" as inventor-id's that appear both before and after Sept 1918

use "input\migration_of_inventors\temp5.dta", clear

bys inventor_id gen_id: egen min_time = min(fyear_fmonth)
bys inventor_id gen_id: egen max_time = max(fyear_fmonth)

keep if min_time < 191809 & max_time >= 191809

** 3) Count patents of "stayers" by npi-city:

bys fyear fmonth gen_id: egen totpat_stayers = sum(fraction_of_patent)

keep fyear fmonth totpat city gen_id

duplicates drop

sort gen_id fyear fmonth

export delimited using input\migration_of_inventors\stayers_city_totpat_fyear_fmonth.csv, replace

erase input\migration_of_inventors\temp.dta
erase input\migration_of_inventors\temp1.dta
erase input\migration_of_inventors\temp3.dta 
erase input\migration_of_inventors\temp4a.dta 
erase input\migration_of_inventors\temp4b.dta 
erase input\migration_of_inventors\temp5.dta
